IMR90 data
processing
#ER_samples =======================
df_er<-df[, grep("ER", colnames(df))]
df_er.pca = prcomp(t(df_er), center = TRUE, scale = TRUE)
group_er<-factor(c(rep("ER_0", 3), rep("ER_4", 3), rep("ER_8", 3),
rep("ER_12", 3), rep("ER_16", 3), rep("ER_20", 3),
rep("ER_24", 3), rep("ER_32", 3), rep("ER_40", 2), rep("ER_48",3)),
levels = c("ER_0", "ER_4", "ER_8", "ER_12", "ER_16", "ER_20",
"ER_24", "ER_32", "ER_40", "ER_48"))
df_er.plot=data.frame(df_er.pca$x[,1],df_er.pca$x[,2],group_er, rownames(df_er.pca$x))
colnames(df_er.plot)=c("PC1","PC2","group_er","cond")
rownames(df_er.plot)=rownames(df_er.pca$x)
percentage_er <- round(df_er.pca$sdev / sum(df_er.pca$sdev) * 100, 2)
percentage_er_lab <- paste(colnames(df_er.plot), "(", paste( as.character(percentage_er), "%", ")", sep=""))
#GFP_samples =========================
df_gfp<-df[, grep("GFP", colnames(df))]
df_gfp.pca = prcomp(t(df_gfp), center = TRUE, scale = TRUE)
group_gfp<-factor(c(rep("GFP_0", 3), rep("GFP_4", 2), rep("GFP_8", 3),
rep("GFP_12", 1), rep("GFP_16", 3), rep("GFP_20", 3),
rep("GFP_24", 3), rep("GFP_32", 3), rep("GFP_40", 3), rep("GFP_48",3)),
levels = c("GFP_0", "GFP_4", "GFP_8", "GFP_12", "GFP_16", "GFP_20",
"GFP_24", "GFP_32", "GFP_40", "GFP_48"))
df_gfp.plot=data.frame(df_gfp.pca$x[,1],df_gfp.pca$x[,2],group_gfp, rownames(df_gfp.pca$x))
colnames(df_gfp.plot)=c("PC1","PC2","group_gfp","cond")
rownames(df_gfp.plot)=rownames(df_gfp.pca$x)
percentage_gfp <- round(df_gfp.pca$sdev / sum(df_gfp.pca$sdev) * 100, 2)
percentage_gfp_lab <- paste(colnames(df_gfp.plot), "(", paste( as.character(percentage_gfp), "%", ")", sep=""))
Plotting 3D PCA plot of
IMR90 ER and GFP samples
tot_explained_variance_ratio_er <- summary(df_er.pca)[["importance"]]['Proportion of Variance',]
tot_explained_variance_ratio_er<- 100 * sum(tot_explained_variance_ratio_er)
tit_er<-paste0("Total Explained Variance = ", tot_explained_variance_ratio_er, "\n PCA of normalized IMR90 ER samples")
components_er<-data.frame(df_er.pca$x[,1],df_er.pca$x[,2],df_er.pca$x[,3], group_er, rownames(df_er.pca$x))
colnames(components_er)<-c("PC1","PC2", "PC3", "group_er", "sample_names")
axx <- list(
title = paste0("PC1 (", percentage_er[1], ")" ))
axy <- list(
title = paste0("PC2 (", percentage_er[2], ")" ))
axz <- list(
title = paste0("PC3 (", percentage_er[3], ")" ))
fig_er <- plot_ly(components_er, x = ~PC1, y = ~PC2, z = ~PC3, color = ~group_er, colors = c('#636EFA','#EF553B','#00CC96')) %>%
add_markers(size = 28)
fig_er <- fig_er %>%
layout(
title = tit_er,
scene = list(bgcolor = "white",
xaxis=axx,
yaxis=axy,
zaxis=axz)
)
fig_er
tot_explained_variance_ratio_gfp <- summary(df_gfp.pca)[["importance"]]['Proportion of Variance',]
tot_explained_variance_ratio_gfp<- 100 * sum(tot_explained_variance_ratio_gfp)
tit_gfp<-paste0("Total Explained Variance = ", tot_explained_variance_ratio_gfp, "\n PCA of normalized IMR90 GFP samples")
components_gfp<-data.frame(df_gfp.pca$x[,1],df_gfp.pca$x[,2],df_gfp.pca$x[,3], group_gfp, rownames(df_gfp.pca$x))
colnames(components_gfp)<-c("PC1","PC2", "PC3", "group_gfp", "sample_names")
axx <- list(
title = paste0("PC1 (", percentage_gfp[1], ")" ))
axy <- list(
title = paste0("PC2 (", percentage_gfp[2], ")" ))
axz <- list(
title = paste0("PC3 (", percentage_gfp[3], ")" ))
fig_gfp <- plot_ly(components_gfp, x = ~PC1, y = ~PC2, z = ~PC3, color = ~group_gfp, colors = c('#636EFA','#EF553B','#00CC96')) %>%
add_markers(size = 28)
fig_gfp <- fig_gfp %>%
layout(
title = tit_gfp,
scene = list(bgcolor = "white",
xaxis=axx,
yaxis=axy,
zaxis=axz))
fig_gfp
Gene co-expression
analysis by WGCNA with IMR90 ER samples
#Clustering by WGCNA (IMR90 48h ER vs.GFP DEGs with abs lfc > = 0 & padj <= 0.05, total 10513 genes)
nettype = "signed"
powers = c(c(1:10), seq(from = 12, to=20, by=2))
IMR90_ER_expression_data<-IMR90_ER_sig_DEGs
sft_ER=pickSoftThreshold(t(IMR90_ER_expression_data), dataIsExpr = TRUE, powerVector = powers, networkType = nettype, verbose = 5)
# Scale-free topology fit index as a function of the soft-thresholding power
plot(sft_ER$fitIndices[,1], -sign(sft_ER$fitIndices[,3])*sft_ER$fitIndices[,2],
xlab="Soft Threshold (power)",ylab= paste0("Scale Free Topology Model Fit ", nettype, " R^2"), type="n",
main = paste("Scale independence"))+text(sft_ER$fitIndices[,1], -sign(sft_ER$fitIndices[,3])*sft_ER$fitIndices[,2],
labels=powers,cex=1,col="red")+abline(h=0.90,col="red")
# Mean connectivity as a function of the soft-thresholding power
plot(sft_ER$fitIndices[,1], sft_ER$fitIndices[,5],
xlab="Soft Threshold (power)",ylab="Mean Connectivity", type="n",
main = paste("Mean connectivity"))+text(sft_ER$fitIndices[,1], sft_ER$fitIndices[,5], labels=powers, cex=1, col="red")
##Create co-expression matrix=========================
cor <- WGCNA::cor
net_ER = blockwiseModules(
t(IMR90_ER_expression_data),
power = 16 , #https://horvath.genetics.ucla.edu/html/CoexpressionNetwork/Rpackages/WGCNA/faq.html Q#6 or based on powerEstimate
TOMType = "signed",
minModuleSize = 30, # We like large modules, so we set the minimum module size relatively high
reassignThreshold = 0,
mergeCutHeight = 0.25,
numericLabels = TRUE,
pamRespectsDendro = FALSE,
saveTOMs = F,
verbose = 3
)
table(net_ER$colors)
cor<-stats::cor
#saveRDS(net_ER, file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/imr90_wgcna/WGCNA_net_ER.rds")
Gene modules analysis
on IMR90 ER samples
IMR90_ER_expression_data<-IMR90_ER_sig_DEGs
net_ER<-readRDS(file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/imr90_wgcna/WGCNA_net_ER.rds")
sizeGrWindow(12,10)
moduleLabels = net_ER$colors
mergedColors = labels2colors(net_ER$colors)
plotDendroAndColors(net_ER$dendrograms[[1]], mergedColors[net_ER$blockGenes[[1]]],
"Module colors",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05)
reference_df<-data.frame(gene = names(moduleLabels), number = unname(moduleLabels), color = mergedColors)
MEs = net_ER$MEs;
geneTree = net_ER$dendrograms[[1]];
# Module identification using dynamic tree cut:
minModuleSize = 30
dynamicMods = cutreeDynamic(dendro = geneTree,
deepSplit = 2,
pamRespectsDendro = FALSE,
minClusterSize = minModuleSize);
## Warning in cutreeDynamic(dendro = geneTree, deepSplit = 2, pamRespectsDendro = FALSE, : cutreeDynamic: method "hybrid" requires a valid dissimilarity matrix "distM".
## Defaulting to method "tree".
table(dynamicMods)
## dynamicMods
## 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
## 174 494 370 283 237 192 186 181 174 148 144 143 141 129 102 98 97 94 90 89
## 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
## 81 80 78 76 74 73 70 65 63 62 55 55 53 53 53 52 49 47 45 45
## 40 41 42 43 44
## 43 42 41 40 39
# Convert numeric lables into colors
dynamicColors = labels2colors(dynamicMods)
table(dynamicColors)
## dynamicColors
## black blue brown cyan darkgreen
## 181 370 283 102 78
## darkgrey darkmagenta darkolivegreen darkorange darkred
## 74 53 53 70 80
## darkturquoise floralwhite green greenyellow grey
## 76 39 192 143 174
## grey60 ivory lightcyan lightcyan1 lightgreen
## 94 40 97 41 90
## lightsteelblue1 lightyellow magenta mediumpurple3 midnightblue
## 42 89 148 43 98
## orange orangered4 paleturquoise pink plum1
## 73 45 55 174 45
## purple red royalblue saddlebrown salmon
## 144 186 81 62 129
## sienna3 skyblue skyblue3 steelblue tan
## 52 63 47 55 141
## turquoise violet white yellow yellowgreen
## 494 53 65 237 49
# Plot the dendrogram and colors underneath
#sizeGrWindow(10,8)
plotDendroAndColors(geneTree, dynamicColors, "Dynamic Tree Cut",
dendroLabels = FALSE, hang = 0.03,
addGuide = TRUE, guideHang = 0.05,
main = "Gene dendrogram and module colors")
# Module-trait(timepoint) relationship for ER samples
head(IMR90_ER_expression_data)
## ER_0_1 ER_0_2 ER_0_3 ER_4_1 ER_4_2 ER_4_3 ER_8_1 ER_8_2
## PLXDC2 1.893141 2.312867 2.282067 2.699412 2.702465 2.493034 3.146952 3.288309
## ASF1B 5.072493 5.114994 5.061236 5.106026 5.050805 4.880512 5.392721 5.372936
## KIF2C 3.974835 3.900262 3.848738 3.789110 3.756419 3.980666 4.209206 4.106902
## ANLN 6.498003 6.671398 6.638677 6.631437 6.601337 6.478345 6.784324 6.799676
## PCNA 6.502892 6.504741 6.515332 6.657774 6.616888 6.684128 6.886018 6.898885
## RRM2 6.707925 6.774455 6.836511 6.816400 6.856196 6.800998 7.130278 7.116562
## ER_8_3 ER_12_1 ER_12_2 ER_12_3 ER_16_1 ER_16_2 ER_16_3 ER_20_1
## PLXDC2 3.194628 3.362010 3.412058 3.366088 3.081144 3.157554 3.266120 3.042571
## ASF1B 5.383395 5.919668 5.829346 5.911272 6.315556 6.296359 6.359931 6.409018
## KIF2C 4.310285 4.476799 4.647568 4.624842 4.717777 4.751112 4.812691 5.440505
## ANLN 6.830324 7.120443 7.149276 7.156529 7.527337 7.559166 7.588323 8.038873
## PCNA 6.794903 7.141812 7.138863 7.189974 7.285442 7.257352 7.306676 7.255316
## RRM2 7.084269 7.625058 7.632879 7.701954 8.118555 8.118053 8.134556 8.486527
## ER_20_2 ER_20_3 ER_24_1 ER_24_2 ER_24_3 ER_32_1 ER_32_2 ER_32_3
## PLXDC2 3.281604 3.046997 2.665396 2.615856 2.757200 2.695371 2.611128 2.134501
## ASF1B 6.496424 6.407333 6.382894 6.446343 6.382189 6.197166 6.458424 6.392571
## KIF2C 5.412143 5.474927 5.725647 5.698976 5.704604 5.477043 5.575120 5.582321
## ANLN 8.057891 8.136565 8.235175 8.310818 8.368891 8.364972 8.106027 7.791679
## PCNA 7.279115 7.220253 7.204059 7.146118 7.164607 7.358832 7.313197 7.352046
## RRM2 8.558498 8.549568 8.704232 8.699960 8.718489 9.020252 8.887162 8.749808
## ER_40_1 ER_40_3 ER_48_1 ER_48_2 ER_48_3
## PLXDC2 2.028296 2.133444 2.070650 2.124552 2.204936
## ASF1B 6.662957 6.410296 6.172017 6.288384 6.160422
## KIF2C 5.590952 5.458114 5.347758 5.355094 5.431088
## ANLN 7.260375 8.061378 8.018595 7.960232 8.062980
## PCNA 7.383814 7.308021 7.452807 7.411484 7.407103
## RRM2 8.562631 8.816505 8.809535 8.687479 8.808142
design_ER<-colnames(IMR90_ER_expression_data)
design_ER<-unlist(lapply(design_ER, function(x) substring(x, 1, nchar(x)-2)))
design_ER<-gsub("_48", "_t5",design_ER)
design_ER<-gsub("_40", "_t5",design_ER)
design_ER<-gsub("_32", "_t4",design_ER)
design_ER<-gsub("_24", "_t4",design_ER)
design_ER<-gsub("_20", "_t3",design_ER)
design_ER<-gsub("_16", "_t3",design_ER)
design_ER<-gsub("_12", "_t2",design_ER)
design_ER<-gsub("_8", "_t2",design_ER)
design_ER<-gsub("_4", "_t1",design_ER)
design_ER<-gsub("_0", "_t1",design_ER)
design_er = model.matrix(~0 + design_ER)
colnames(design_er) = levels(as.factor(design_ER))
moduleTraitCor = cor(MEs, design_er, use = "p")
nSamples = ncol(t(IMR90_ER_expression_data))
#moduleTraitPvalue_student = corPvalueStudent(moduleTraitCor, nSamples) #Student asymptotic p-value for correlation
moduleTraitPvalue_fisher = corPvalueFisher(moduleTraitCor, nSamples) #Fisher's asymptotic p-value for correlation
sizeGrWindow(10,6)
# Display correlations and their p-values
textMatrix = paste(signif(moduleTraitCor, 2), "\n(",
signif(moduleTraitPvalue_fisher, 1), ")", sep = "");
dim(textMatrix) = dim(moduleTraitCor)
par(mar = c(5, 5, 5, 5));
# Display the correlation values within a heatmap
colnames(MEs)<-substr(colnames(MEs), 3, nchar(colnames(MEs)))
colnames(MEs)<-paste0("Module ", colnames(MEs))
#moduleTraitCor<-moduleTraitCor[,factor(c("ER_0", "ER_early", "ER_middle", "ER_late"), levels = c("ER_0", "ER_early", "ER_middle", "ER_late"))]
moduleTraitCor<-moduleTraitCor[,factor(c("ER_t1", "ER_t2", "ER_t3", "ER_t4", "ER_t5"), levels = c("ER_t1", "ER_t2", "ER_t3", "ER_t4", "ER_t5"))]
rownames(moduleTraitCor)<-paste0("Module_", unlist(lapply(rownames(moduleTraitCor), function(x) substr(x, 3, nchar(x)))))
#pdf(file = "/xdisk/mpadi/jiawenyang/result/merkel_cell/pp_in_mcc_paper/figure/IMR90_WGCNA_Module-time_period_relationships.pdf")
labeledHeatmap(Matrix = moduleTraitCor,
#xLabels = c("ER_0", "ER_early", "ER_middle", "ER_late"),
xLabels = c("ER_t1", "ER_t2", "ER_t3", "ER_t4", "ER_t5"),
yLabels = names(MEs),
ySymbols = names(MEs),
colorLabels = FALSE,
colors = blueWhiteRed(100),
textMatrix = textMatrix,
setStdMargins = T,
cex.text = 0.4,
zlim = c(-1,1),
main = paste("IMR90_ER: Module-time period relationships"))
#dev.off()
# plot Eigengene adjacency heatmap
#pdf(file = "/xdisk/mpadi/jiawenyang/result/merkel_cell/pp_in_mcc_paper/figure/IMR90_WGCNA_Eigengene_adjacency_heatmap.pdf")
plotEigengeneNetworks(MEs, "", marDendro = c(0,4,1,2), marHeatmap = c(3,4,1,2), cex.lab = 0.8, xLabelsAngle = 30)
#dev.off()
Eigengene analysis on
IMR90 ER samples
# 1. Performing Eigengene analysis on gene modules across time in IMR90 ER samples
ER_time_point<-c("ER_0", "ER_4", "ER_8",
"ER_12", "ER_16", "ER_20",
"ER_24", "ER_32", "ER_40",
"ER_48")
MEs_mean<-data.frame(modules = rownames(t(MEs)))
for (i in 1:length(ER_time_point)){
time_point<-ER_time_point[i]
sub_df<-as.data.frame(t(MEs)[, grep(time_point, colnames(t(MEs)))])
sub_df[,"mean"]<-apply(sub_df, 1, mean)
df_mean<-as.data.frame(sub_df[, "mean"])
colnames(df_mean)<-time_point
MEs_mean<-cbind(MEs_mean, df_mean)
}
rownames(MEs_mean)<-MEs_mean$modules
MEs_mean<-MEs_mean[,-1]
MEs_mean_df<-reshape2::melt(as.matrix(MEs_mean))
colnames(MEs_mean_df)<-c("Module", "time", "Eigengene")
MEs_mean_df$time<-unlist(lapply(as.character(MEs_mean_df$time), function(x) strsplit(x, "_")[[1]][2]))
MEs_mean_df$time<-as.numeric(MEs_mean_df$time)
MEs_mean_df$Eigengene<-as.numeric(MEs_mean_df$Eigengene)
theme<-theme(panel.background = element_blank(),
panel.border=element_rect(fill=NA),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.background=element_blank(),
axis.text.x=element_text(colour="black"),
axis.text.y=element_text(colour="black"),
axis.ticks=element_line(colour="black"),
plot.margin=unit(c(1,1,1,1),"line"),
axis.title.x = element_text(color="black", size=20, face="bold"),
axis.title.y = element_text(color="black", size=20, face="bold"),
axis.text = element_text(size = 20, face = "bold"))
for (i in 1:length(rownames(MEs_mean))){
module<-rownames(MEs_mean)[i]
df_module<-MEs_mean_df[MEs_mean_df$Module == module,]
p<-ggplot(df_module,
aes(x=factor(time, levels = c(0,4,8,12,16,20,24,32,40,48)),
y=Eigengene, colour=Module, group=Module, fill=Module)) +
geom_line(size =0.1)+
geom_point(size=0.5)+
ylim(-0.4, 0.4)+
labs(x = 'time (h)', y = "Eigengene",title = 'Eigengenes of the each WGCNA modules')
#plot(p)
}
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# 2. Putting the dynamic Eigengene curve of each module into groups based on the distance of modules
p1<-ggplot(MEs_mean_df[MEs_mean_df$Module %in% c("Module 14", "Module 1", "Module 11"), ],
mapping = aes(x=factor(time, levels = c(0,4,8,12,16,20,24,32,40,48)), y=Eigengene, group = Module, fill = Module)) +
geom_line(aes(color=Module), size = 0.8)+
geom_point(size=2, shape = 21)+
scale_fill_brewer(palette = "Blues")+
scale_color_brewer(palette = "Blues")+
ylim(-0.4, 0.4)+
labs(x = 'time (h)', y = "Eigengene")
p1<-p1+theme
p2<-ggplot(MEs_mean_df[MEs_mean_df$Module %in% c("Module 13", "Module 5", "Module 10", "Module 12"), ],
mapping = aes(x=factor(time, levels = c(0,4,8,12,16,20,24,32,40,48)), y=Eigengene, group = Module, fill = Module)) +
geom_line(aes(color=Module), size = 0.8)+
geom_point(size=2, shape = 21)+
scale_fill_brewer(palette = "Greens")+
scale_color_brewer(palette = "Greens")+
ylim(-0.4, 0.4)+
labs(x = 'time (h)', y = "Eigengene")
p2<-p2+theme
p3<-ggplot(MEs_mean_df[MEs_mean_df$Module %in% c("Module 4", "Module 3", "Module 8"), ],
mapping = aes(x=factor(time, levels = c(0,4,8,12,16,20,24,32,40,48)), y=Eigengene, group = Module, fill = Module)) +
geom_line(aes(color=Module), size = 0.8)+
geom_point(size=2, shape = 21)+
scale_fill_brewer(palette = "Oranges")+
scale_color_brewer(palette = "Oranges")+
ylim(-0.4, 0.4)+
labs(x = 'time (h)', y = "Eigengene")
p3<-p3+theme
p4<-ggplot(MEs_mean_df[MEs_mean_df$Module %in% c("Module 6", "Module 9", "Module 7", "Module 2"), ],
mapping = aes(x=factor(time, levels = c(0,4,8,12,16,20,24,32,40,48)), y=Eigengene, group = Module, fill = Module)) +
geom_line(aes(color=Module), size = 0.8)+
geom_point(size=2, shape = 21)+
scale_fill_brewer(palette = "Purples")+
scale_color_brewer(palette = "Purples")+
ylim(-0.4, 0.4)+
labs(x = 'time (h)', y = "Eigengene")
p4<-p4+theme
WGCNA_Eigengenes_plot<-ggarrange(p1, p2, p3, p4,
ncol = 2,
nrow = 2
)
WGCNA_Eigengenes_plot

#pdf(file = "/xdisk/mpadi/jiawenyang/result/merkel_cell/pp_in_mcc_paper/figure/IMR90_WGCNA_Eigengenes_dynamics.pdf", width = 12, height = 8)
#WGCNA_Eigengenes_plot
#dev.off()
GO-terms analysis on
gene modules from IMR90 ER samples
#genemart <- useDataset("hsapiens_gene_ensembl", useMart("ensembl"))
#saveRDS(genemart, file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/genemart.rds")
genemart<-readRDS("/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/imr90_wgcna/genemart.rds")
probes = colnames(t(IMR90_ER_expression_data))
g_list<-getBM(filters = "hgnc_symbol",
attributes= c("ensembl_gene_id","hgnc_symbol", "entrezgene_id"),values=probes,mart= genemart)
gene_list<-list()
for (i in 1:length(unique(moduleLabels))){
number<-unique(moduleLabels)[i]
module_name<-paste0("Module_", number)
modnumber<-probes[moduleLabels == number]
df_genes<-g_list[g_list$hgnc_symbol %in% modnumber,"ensembl_gene_id"]
gene_list[[module_name]]<-df_genes
}
#names(gene_list)<-paste0("Module_", unique(moduleLabels))
gene_df<-list()
module_name_list<-paste0("Module_", c(1:14))
for (i in 1:14){
module_name<-module_name_list[i] #remove Module_0, since it's the module with genes that do not co-express with others
genes<-gene_list[[module_name]]
df_genes<-g_list[g_list$ensembl_gene_id %in% genes,]
df_genes<-df_genes[!duplicated(df_genes$hgnc_symbol),]
gene_df[[module_name]]<-df_genes
}
##GO-Term enrichment based on WGCNA network================================
WGCNA_modules_go_result<-list()
for (i in 1:length(gene_df)){
module_name<-names(gene_df)[i]
genes<-gene_df[[module_name]]
genes<-genes$hgnc_symbol
em_ER_BP<-enrichGO(genes,
OrgDb = org.Hs.eg.db,
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.05,
qvalueCutoff = 0.25,
keyType = 'SYMBOL')
WGCNA_modules_go_result[[module_name]]<-em_ER_BP@result
}
#saveRDS(WGCNA_modules_go_result, file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/IMR90_WGCNA_modules_go_result.RDS")
Significant Go terms
visualization for WGCNA modules
WGCNA_modules_go_result<-readRDS("/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/imr90_wgcna/IMR90_WGCNA_modules_go_result.RDS")
color_pal<-c(brewer.pal(n = 3, "Blues"), brewer.pal(n = 4, "Greens"), brewer.pal(n = 3, "Oranges"), brewer.pal(n = 4, "Purples"))
sig_go_results_list_order<-paste0("Module_", c(14,1,11,13,5,10,12,4,3,8,6,9,7,2))
color_pal_new<-color_pal[c(1:9,11,13:14)]
sig_go_results_df<-data.frame()
for (i in 1:14){
module<-as.character(sig_go_results_list_order[i])
go_result<-WGCNA_modules_go_result[[module]]
go_result[,"module"]<-module
go_result<-go_result[go_result$p.adjust <= 0.05, ]
go_result<-go_result[order(as.numeric(go_result$p.adjust), decreasing = F), ][1:20,]
sig_go_results_df<-rbind(sig_go_results_df, go_result)
}
sig_go_results_df<-na.omit(sig_go_results_df)
sig_go_results_df<-sig_go_results_df[!duplicated(sig_go_results_df$Description),]
#write.csv(sig_go_results_df, file = "/xdisk/mpadi/jiawenyang/result/merkel_cell/pp_in_mcc_paper/table/IMR90_WGCNA_modules_sig_go_result.csv")
p<-ggplot(data=sig_go_results_df, aes(x=factor(Description, levels = sig_go_results_df$Description), y= -log10(p.adjust)), fill = module)+
geom_col(aes(fill = module), colour = "black", width=0.85, position=position_dodge(0.5)) +
theme(axis.text.x = element_text(size = 10, angle=90, vjust=.5, hjust=1, face = "bold"),
panel.background = element_blank(),
legend.position= c(0.13, 0.7),
legend.direction = "horizontal",
legend.title = element_text(colour="black", size=20,
face="bold"),
legend.text = element_text(colour="black", size=15,
face="bold"),
legend.background = element_rect(fill="grey90",
linewidth =0.5, linetype="solid"),
axis.text.y = element_text(size = 15, face = "bold"),
plot.title = element_text(size = 20, face = "bold"),
axis.title = element_text(size = 20, face = "bold"),
axis.line = element_line(colour = "black"))+
ylab("-log10(p.adjust)") +
xlab("GO Terms")+
scale_y_continuous(breaks = seq(0, -log10(min(sig_go_results_df$p.adjust))*1.0, by = 5))+
scale_fill_manual(values=color_pal_new, limits = c(unique(sig_go_results_df$module)))+
scale_x_discrete(labels = function(x) str_wrap(x, width = 65))
p

#pdf(file = "/xdisk/mpadi/jiawenyang/result/merkel_cell/pp_in_mcc_paper/figure/IMR90_WGCNA_Modules_GO_enrichment.pdf", width = 35, height = 15)
#plot(p)
#dev.off()
Network centrality
analysis and network visualization of gene modules from WGCNA
#adjMatrix <- adjacency(t(IMR90_ER_expression_data), power = 16, type = "signed")
#TOM = TOMsimilarity(adjMatrix)
#saveRDS(TOM, file = "/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/WGCNA_TOM.rds")
TOM = readRDS("/xdisk/mpadi/jiawenyang/data/merkel_cell/pp_in_mcc_paper/imr90_wgcna/WGCNA_TOM.rds")
probes = colnames(t(IMR90_ER_expression_data))
dimnames(TOM)<-list(probes, probes)
# 1.Selecting top hub genes from each gene module
network_hub_list<-list()
nTOM = 40 #top nodes from the WGCNA network
for (i in 1:length(gene_df)){
module<-names(gene_df)[i]
genes<-gene_df[[module]][,"hgnc_symbol"]
m<-TOM[genes, genes]
hubs<-names(sort(rowSums(m), decreasing = T)[1:nTOM])
network_hub_list[[module]]<-hubs
}
network_hub_list # The top 40 hubs in each module
## $Module_1
## [1] "NIBAN2" "CAPN1" "INF2" "MGAT4B" "VPS28" "FLOT2" "FKBP8"
## [8] "FAM3A" "GNB2" "CDIPT" "G6PD" "METRN" "RNPEPL1" "EHD2"
## [15] "APRT" "TSPAN4" "FBXW5" "PACS2" "CAPG" "TUBGCP2" "MAPK3"
## [22] "BLVRB" "SLC27A1" "VAT1" "GNAI2" "IGFBP6" "DBNL" "PIP5K1C"
## [29] "SPON2" "MVP" "IFITM3" "ERCC2" "PGLS" "LY6E" "NISCH"
## [36] "TGFB1I1" "VEGFB" "GSN" "NPDC1" "TSC2"
##
## $Module_2
## [1] "FANCD2" "MAD2L1" "SMARCC1" "CDK1" "UCHL5" "RRM2"
## [7] "SKA3" "SMC4" "CIP2A" "NDC1" "RRM2P3" "CENPU"
## [13] "NCAPG2" "CENPN" "ZNF714" "HMGB2" "PAICS" "CBX1"
## [19] "MELK" "DDIAS" "NDC80" "SMC2" "FANCI" "CSE1L"
## [25] "RAD51AP1" "NUP107" "RFC5" "SPC25" "CCNB2" "UBE2T"
## [31] "CEP55" "PBK" "RFC3" "SINHCAF" "POLR3G" "PAICSP4"
## [37] "MTHFD2" "DHX9" "PHACTR4" "MTF2"
##
## $Module_3
## [1] "NUP153" "NUP155" "PPAT" "BRCA2" "HS2ST1" "NAA50" "ATAD5"
## [8] "MMS22L" "SMCHD1" "NUP50" "XPOT" "SLC16A1" "UTP20" "SEH1L"
## [15] "QSER1" "ABCE1" "NOC3L" "ELOVL5" "SASS6" "NPAT" "ZNF850"
## [22] "IPO7" "PDS5B" "NUP58" "DNA2" "ATAD2" "CHD1" "AHCTF1"
## [29] "HAUS6P1" "USP1" "SCML2" "NUDT21" "KLHL23" "ZNF678" "PLOD2"
## [36] "LRRC8B" "ENC1" "FMR1" "MSH2" "XPO4"
##
## $Module_4
## [1] "OSBPL8" "RLIM" "RO60" "CAMSAP2" "ZFYVE16" "DNAJB14"
## [7] "LTN1" "ITCH" "CACNA2D1" "NHLRC2" "UBR3" "ZBTB41"
## [13] "TAF2" "ZNF451" "ERBIN" "SEL1L" "ATP11B" "SOCS4"
## [19] "NEDD4" "EXOC5" "ZNF800" "TMF1" "ARHGAP5" "SLC16A7"
## [25] "ITGAV" "ACSL4" "SMAD5" "BMPR1A" "MBNL1" "EDEM1"
## [31] "ZNF148" "ATP7A" "MOB1B" "MDFIC" "MAN2A1" "SP3"
## [37] "REST" "TNKS2" "MAN1A2" "MARCHF6"
##
## $Module_5
## [1] "THSD4" "LMF1" "IGFBP5" "APCDD1L" "KCNMA1" "H6PD" "PHLDB1"
## [8] "CPA4" "EPHB2" "SIDT2" "TNS1" "SEMA7A" "CD9" "NPTX1"
## [15] "MYPN" "MYADM" "GALNT10" "CCND1" "SLC17A5" "GM2A" "CDCP1"
## [22] "BACE1" "PLBD2" "MGAT5" "DHRS7" "CXCL12" "TGM2" "SLC16A4"
## [29] "KREMEN1" "FBLN5" "FAM120B" "LTBP2" "MBTPS1" "FOLR3" "GALNT11"
## [36] "PTPRA" "MEAK7" "PSG4" "LBH" "WNT5A"
##
## $Module_6
## [1] "RPS3AP6" "RPS3AP26" "RPS3AP5" "RPS3A" "RPL9" "RPS3AP21"
## [7] "RPL9P7" "RPL17P36" "RPS3AP20" "RPL7P9" "RPL15" "RPL7P32"
## [13] "RPS3AP25" "RPL4P4" "RPS3AP47" "RPL7" "RPL17P34" "RPL6"
## [19] "RPL5" "RPL39" "RPL5P4" "RPL7P26" "RPL4" "SNHG5"
## [25] "SF3B6" "RPL7P1" "RPL7P15" "RPL5P34" "RPL7P6" "RPL7P24"
## [31] "RPL39P3" "RPS20P14" "RPL6P27" "RPS23" "RPS25" "RPL17P43"
## [37] "RPS7P11" "RPS3AP49" "RPL24P7" "RPS4X"
##
## $Module_7
## [1] "ALYREF" "SMPD4" "TK1" "MAD2L2" "CHTF18"
## [6] "SAE1" "TOMM40" "DTYMK" "PPM1G" "KHSRP"
## [11] "TRAF4" "SLC25A29" "DCAF15" "HNRNPUL1" "UBN1"
## [16] "FBL" "HNRNPA1P63" "TCF3" "TOMM40P4" "SAFB"
## [21] "LRRC20" "LMNB2" "CPSF4" "TOMM40P1" "RNPS1P1"
## [26] "POP7" "SGTA" "RBM19" "SAFB2" "EZR"
## [31] "TOMM34" "H1-10" "SNHG7" "ITPKA" "CNIH2"
## [36] "CDKN2D" "LAS1L" "RANP1" "ARRB2" "TRAF2"
##
## $Module_8
## [1] "USP12" "PAK2" "RPRD1A" "ATP11C" "UBA6" "PANK3"
## [7] "ARPP19" "SMARCAD1" "TNPO1" "MAPK6" "USP15" "PAPOLA"
## [13] "MYSM1" "MARCHF7" "PPP4R2" "TAB2" "ZNF518B" "RP2"
## [19] "DNM1L" "STAG2" "KIF5B" "MOB1A" "QKI" "ITGA4"
## [25] "LYPLA1" "PGAP1" "PPP6R3" "PTPN4" "MORC3" "UBQLN1"
## [31] "FAM91A1" "AHR" "SPRED1" "PCNX4" "AIDAP1" "TTLL7"
## [37] "SOAT1" "BCLAF1P2" "CGGBP1" "SSX2IP"
##
## $Module_9
## [1] "FANCA" "HAS3" "POLE" "SMC1A" "SLC1A4" "SLC29A1" "NUP188"
## [8] "LZTS1" "RNF227" "PTBP1" "TCF19" "TFDP1" "MYO19" "TMEM201"
## [15] "MICAL3" "AKAP1" "SCAF4" "CDCA4" "ICMT" "CDT1" "NAA40"
## [22] "NPHP4" "SAP130" "GEMIN4" "TMEM109" "TONSL" "PASK" "TLN2"
## [29] "FHOD3" "CAD" "GPRIN1" "NUP214" "MCM2" "UTP4" "POFUT1"
## [36] "MANEAL" "DPF1" "MCM5" "CABLES2" "ACACB"
##
## $Module_10
## [1] "DSP" "CSGALNACT1" "CDH6" "TRPC6" "ANKLE2"
## [6] "CHST11" "CCN2" "CREB3L2" "SIRPA" "SMOC1"
## [11] "B4GALT1" "FLRT2" "BHLHE40" "AKAP12" "NOX4"
## [16] "ADAM19" "F3" "KCNH1" "TFPI2" "MYOCD"
## [21] "EDN1" "IL11" "PCNX1" "TCF7" "DCBLD1"
## [26] "CRISPLD2" "S1PR1" "TIMP3" "COL4A1" "ATP2A2"
## [31] "SLC22A23" "ABHD2" "SIRPB1" "HIVEP2" "PTGS2"
## [36] "TEX2" "CSTF2T" "UCK2" "PRSS23" "KLF7"
##
## $Module_11
## [1] "VARS1" "PPP1R12C" "GNA11" "SRM" "ARF1" "BCAR1"
## [7] "AP1M1" "MRPL4" "PGP" "PLCB3" "IMPDH1" "AACS"
## [13] "CNN2" "FARSA" "PSMG4" "BOP1" "DHX30" "TRIM11"
## [19] "ALDH16A1" "MFN2" "ATAD3C" "SIVA1" "GTPBP1" "CCDC9"
## [25] "CNP" "ATAD3B" "KLF16" "SMTN" "SMARCA4" "CNOT3"
## [31] "MBD3" "THOP1" "KIAA2013" "ALKBH5" "PREB" "PRADC1"
## [37] "TTLL12" "MTA1" "C19orf25" "APBA3"
##
## $Module_12
## [1] "CBARP" "CHMP1B" "DACT1" "TNFRSF1B" "INHBA" "STC1"
## [7] "PMEPA1" "IL1R1" "PRPSAP1" "TMEM120B" "SOX4" "CBLB"
## [13] "RASD2" "ETS2" "NR4A3" "SLC16A6" "CREM" "NFATC2"
## [19] "MAFK" "PITX2" "MSC" "NEDD9" "LIF" "NGF"
## [25] "PDGFA" "PPP1R3C" "IL4R" "SPHK1" "KLHL26" "MAP3K4"
## [31] "SSBP3" "NKX3-1" "SHROOM2" "NHS" "NFATC1" "F2RL1"
## [37] "TENT5A" "MECOM" "PITPNC1" "NR4A2"
##
## $Module_13
## [1] "CHRM2" "CPED1" "DSEL" "SNX18" "PRRX1"
## [6] "PRKCA" "CPEB2" "IGF2BP3" "NRP1" "CCDC50"
## [11] "CAMK2D" "CDC42EP3" "PCDH18" "FGD4" "ELK3"
## [16] "DENND10P1" "CDC42EP3P1" "LIX1L" "PRKAR1AP1" "TBCK"
## [21] "VCPIP1" "NBEA" "LINC00674" "WASF3" "WDR19"
## [26] "SNAI2" "PLAG1" "SEC22B3P" "SEC22B2P" "FMN2"
## [31] "PPP1R21" "SATB1" "RUNX2" "FNIP1" "NPEPPS"
## [36] "PDPK1" "MTCO1P2" "ARL2BP" "YWHAG" "NOP56"
##
## $Module_14
## [1] "KRTAP2-4" "KRTAP2-2" "KRTAP2-1" "KRTAP2-3" "FOSL1P1" "FOSL1"
## [7] "PRAG1" "FZD2" "BCAR3" "SEC14L1" "NAMPT" "CTH"
## [13] "NAMPTP1" "OSR1" "PRDM1" "PTPRE" "AGFG2" "DVL2"
## [19] "MVK" "AREG" "FZD7" "RYBP" "FOSB" "ATP8B1"
## [25] "TMEM87A" "TRAK1" "FRY" "NR3C2" "JCAD" "GRB10"
## [31] "RBMS2P1" "AMMECR1L" "CBX7" "ZFAND5" "ACVR1B" "MAMSTR"
## [37] "ELMO2" "RIPK2" "PIK3R1" "SASH1"
# 2. Generating adjacency matrix for hub genes from all modules
hub_genes<-unname(unlist(network_hub_list))
hubs_m<-TOM[hub_genes, hub_genes]
edges<-data.frame(from=rownames(hubs_m)[row(hubs_m)[upper.tri(hubs_m)]],
to=colnames(hubs_m)[col(hubs_m)[upper.tri(hubs_m)]],
value=hubs_m[upper.tri(hubs_m)])
# 3. Setting up thredshold for selecting leading edges in the network
leading_edges_ratio = 0.20 #top 1/5 edges based on edge weight.
threshold = quantile(edges$value, probs=1-leading_edges_ratio , na.rm = FALSE)
edges<-edges[edges$value >= threshold,]
# 4. Organizing vertices data frame and edges data frame for the network
vertices<-data.frame()
for(i in 1:length(network_hub_list)){
module_name<-names(network_hub_list)[i]
v<-data.frame(name = network_hub_list[[i]], module = rep(module_name, nTOM), size = rep(8, nTOM))
vertices<-rbind(vertices, v)
}
vertices[,"group"]<-unlist(lapply(vertices$module, function(x) strsplit(x, "_")[[1]][2]))
vertices[,"id"]<-rownames(vertices)
# 5. Centrality analysis of hub genes co-expression network
g<-graph_from_data_frame(edges, directed = F, vertices)
eigenvector<-eigen_centrality(g)$vector
betweenness<-betweenness(g)
degree<-degree(g)
closeness<-closeness(g)
centralities <- cbind(degree, eigenvector, betweenness, closeness)
centrality_df<-cbind(centralities, vertices$module)
#write.csv(centrality_df, file = "/xdisk/mpadi/jiawenyang/result/merkel_cell/pp_in_mcc_paper/table/IMR90_WGCNA_hubs_netwrok_centrality_analysis.csv")
DT::datatable(centrality_df) #show centrality data of nodes for sorting
# 6. Visualize the hub genes co-expression network
edges[,"source"]<-vertices[match(edges$from, vertices$name), "id"]
edges[,"target"]<-vertices[match(edges$to, vertices$name), "id"]
edges$source<-as.numeric(edges$source)
edges$target<-as.numeric(edges$target)
edges<-data.frame(source = edges$source-1, target = edges$target-1, value = edges$value)
vertices<-cbind(vertices, centralities)
vertices[,"betweenness_sqrt"]<-sqrt(vertices$betweenness)
color_df<-data.frame(color = color_pal, module = c(14,1,11,13,5,10,12,4,3,8,6,9,7,2))
color_df<-color_df[order(as.numeric(sub("\\D+", "", color_df$module))),] #match the module color with previous figures.
ColourScale <- 'd3.scaleOrdinal()
.domain(["1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14"])
.range(["#9ECAE1", "#6A51A3", "#FDAE6B", "#FEE6CE", "#BAE4B3", "#F2F0F7", "#9E9AC8", "#E6550D", "#CBC9E2", "#74C476", "#3182BD", "#238B45", "#EDF8E9", "#DEEBF7"]);'
fn<-forceNetwork(Links = edges, Nodes = vertices,
Source = "source", Target = "target",
Nodesize = "betweenness_sqrt", fontSize = 12,
Value = "value", NodeID = "name",
Group = "group", opacity = 1,
legend = T, charge = -20, zoom = F,
opacityNoHover = 1,
colourScale= JS(ColourScale),
linkWidth = JS("function(d) { return Math.cbrt(d.value)*0.4; }")
)
fn